library(dplyr)
library(EIAapi)
library(jsonlite)
library(gt)
library(plotly)
source("../pipeline/eia_data.R")Data Backfill
The goal of this doc is to execute an initial data pull of the hourly demand for California balancing authority subregion (CISO). This includes the following four independent system operators:
- Pacific Gas and Electric (PGAE)
- Southern California Edison (SCE)
- San Diego Gas and Electric (SDGE)
- Valley Electric Association (VEA)
The data backfill process includes the following steps:
- Setting parameters and pulling the data
- Data quality checks
- Saving the data and creating a log file
Load Libraries and Functions
meta_json <- read_json(path = "../settings/settings.json")
s <- meta_json$series
series <- lapply(1:length(s), function(i) {
subba_id <- NULL
subba_id <- as.numeric(s[[i]]$subba_id)
if (!is.na(as.numeric(s[[i]]$subba_id))) {
subba_id <- as.numeric(s[[i]]$subba_id)
} else {
subba_id <- s[[i]]$subba_id
}
return(data.frame(
parent_id = s[[i]]$parent_id,
parent_name = s[[i]]$parent_name,
subba_id = subba_id,
subba_name = s[[i]]$subba_name
))
}) |>
bind_rows()
api_path <- meta_json$api_path
meta_path <- meta_json$meta_path
data_path <- meta_json$data_pathfacets_template <- list(
parent = NULL,
subba = NULL
)
start <- as.POSIXct(paste(
paste(
meta_json$start$year,
meta_json$start$month,
meta_json$start$day,
sep = "-"
),
" ",
meta_json$start$hour,
":00:00",
sep = ""
))
end <- as.POSIXct(paste(
paste(
meta_json$end$year,
meta_json$end$month,
meta_json$end$day,
sep = "-"
),
" ",
meta_json$end$hour,
":00:00",
sep = ""
))
# start <- as.POSIXct("2024-05-18 08:00:00")
# end <- as.POSIXct("2024-06-01 01:00:00")
attr(start, "tzone") <- "UTC"
attr(end, "tzone") <- "UTC"
offset <- 2200
eia_api_key <- Sys.getenv("EIA_API_KEY")metadata <- eia_metadata(api_key = eia_api_key, api_path = api_path)Warning: input string 'The api_key argument is missing... [0;92m❌[0m
' cannot be translated from 'ANSI_X3.4-1968' to UTF-8, but is valid UTF-8
Warning: input string 'The api_key argument is not valid... [0;92m❌[0m
' cannot be translated from 'ANSI_X3.4-1968' to UTF-8, but is valid UTF-8
Warning: input string 'The api_path argument is missing... [0;92m❌[0m
' cannot be translated from 'ANSI_X3.4-1968' to UTF-8, but is valid UTF-8
Warning: input string 'The api_path argument is not valid, must be a character object [0;92m❌[0m
' cannot be translated from 'ANSI_X3.4-1968' to UTF-8, but is valid UTF-8
Warning: input string 'Could not pull the metadata... [0;92m❌[0m
' cannot be translated from 'ANSI_X3.4-1968' to UTF-8, but is valid UTF-8
Warning: input string 'Could not parse the metadata JSON... [0;92m❌[0m
' cannot be translated from 'ANSI_X3.4-1968' to UTF-8, but is valid UTF-8
Warning: input string 'The api_key argument is missing... [0;92m❌[0m
' cannot be translated from 'ANSI_X3.4-1968' to UTF-8, but is valid UTF-8
Warning: input string 'The api_key argument is not valid... [0;92m❌[0m
' cannot be translated from 'ANSI_X3.4-1968' to UTF-8, but is valid UTF-8
Warning: input string 'The api_path argument is missing... [0;92m❌[0m
' cannot be translated from 'ANSI_X3.4-1968' to UTF-8, but is valid UTF-8
Warning: input string 'The api_path argument is not valid, must be a character object [0;92m❌[0m
' cannot be translated from 'ANSI_X3.4-1968' to UTF-8, but is valid UTF-8
Warning: input string 'Could not pull the metadata... [0;92m❌[0m
' cannot be translated from 'ANSI_X3.4-1968' to UTF-8, but is valid UTF-8
Warning: input string 'Could not parse the metadata JSON... [0;92m❌[0m
' cannot be translated from 'ANSI_X3.4-1968' to UTF-8, but is valid UTF-8
print(names(metadata)) [1] "id" "name" "description"
[4] "frequency" "facets" "data"
[7] "startPeriod" "endPeriod" "defaultDateFormat"
[10] "defaultFrequency" "command"
print(metadata$startPeriod)[1] "2018-06-19T05"
print(metadata$endPeriod)[1] "2024-06-28T07"
meta <- NULL
data <- NULL
for (i in 1:nrow(series)) {
facets <- facets_template
facets["parent"] <- series[i, "parent_id"]
facets["subba"] <- series[i, "subba_id"]
print(facets)
temp <- eia_backfill(
start = start,
end = end,
offset = offset,
api_key = eia_api_key,
api_path = paste(api_path, "data", sep = ""),
facets = facets
)
index <- seq.POSIXt(from = start, to = end, by = "hour")
ts_obj <- data.frame(period = index) |>
left_join(temp, by = c("period" = "time"))
meta_temp <- create_metadata(data = ts_obj, start = start, end = end, type = "backfill")
meta_temp$index <- 1
meta_df <- as.data.frame(meta_temp)
meta <- rbind(meta, meta_df)
data <- rbind(data, ts_obj)
}$parent
[1] "CISO"
$subba
[1] "PGAE"
$parent
[1] "CISO"
$subba
[1] "SCE"
$parent
[1] "CISO"
$subba
[1] "SDGE"
$parent
[1] "CISO"
$subba
[1] "VEA"
print(meta) index parent subba time start
1 1 CISO PGAE 2024-06-28 20:44:47 2018-07-01 08:00:00
2 1 CISO SCE 2024-06-28 20:45:41 2018-07-01 08:00:00
3 1 CISO SDGE 2024-06-28 20:46:32 2018-07-01 08:00:00
4 1 CISO VEA 2024-06-28 20:47:25 2018-07-01 08:00:00
end start_act end_act start_match end_match
1 2024-06-15 01:00:00 2018-07-01 08:00:00 2024-06-15 TRUE FALSE
2 2024-06-15 01:00:00 2018-07-01 08:00:00 2024-06-15 TRUE FALSE
3 2024-06-15 01:00:00 2018-07-01 08:00:00 2024-06-15 TRUE FALSE
4 2024-06-15 01:00:00 2018-07-01 08:00:00 2024-06-15 TRUE FALSE
n_obs na type update success
1 52233 546 backfill FALSE FALSE
2 52233 546 backfill FALSE FALSE
3 52233 546 backfill FALSE FALSE
4 52233 546 backfill FALSE FALSE
comments
1 The end argument does not match the actual; Missing values were found;
2 The end argument does not match the actual; Missing values were found;
3 The end argument does not match the actual; Missing values were found;
4 The end argument does not match the actual; Missing values were found;
# The initial pull has some missing values
head(data) period subba subba_name parent
1 2018-07-01 08:00:00 PGAE Pacific Gas and Electric CISO
2 2018-07-01 09:00:00 PGAE Pacific Gas and Electric CISO
3 2018-07-01 10:00:00 PGAE Pacific Gas and Electric CISO
4 2018-07-01 11:00:00 PGAE Pacific Gas and Electric CISO
5 2018-07-01 12:00:00 PGAE Pacific Gas and Electric CISO
6 2018-07-01 13:00:00 PGAE Pacific Gas and Electric CISO
parent_name value value_units
1 California Independent System Operator 12522 megawatthours
2 California Independent System Operator 11745 megawatthours
3 California Independent System Operator 11200 megawatthours
4 California Independent System Operator 10822 megawatthours
5 California Independent System Operator 10644 megawatthours
6 California Independent System Operator 10559 megawatthours
# Save the data
d <- append_data(data_path = data_path, new_data = data, init = TRUE, save = TRUE)[1] "Initial data pull"
[1] "Save the data to CSV file"
# Save the metadata
meta["success"] <- TRUE
meta["update"] <- TRUE
m <- append_metadata(meta_path = meta_path, new_meta = meta, init = TRUE, save = TRUE)[1] "Saving the metadata file"
Plot the Series
We will use Plotly to visualize the series:
d <- data |> arrange(subba, period)
d$subba <- as.character(d$subba)
p <- plot_ly(d, x = ~period, y = ~value, color = ~subba, type = "scatter", mode = "lines")
p